#! /bin/sh
# PCP QA Test No. 430
#
# Test out pmlogger_check with duplicate directory names
# Test out pmlogger_check with directory names where one is a subname
# of the other.
#
# Note:
# 	This test assumes 127.0.0.2 is available ... for some platforms
# 	this may not be the case, e.g. Mac OS X, where it is necessary
# 	to arrange for the command below to be run in the reboot
# 	sequence ...
# 	# ifconfig lo0 alias 127.0.0.2
#
# Copyright (c) 1995-2002 Silicon Graphics, Inc.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard filters
. ./common.product
. ./common.filter
. ./common.check

rm -f $seq.full

realhost=`hostname`
LOCALHOST=`echo $realhost | sed -e 's/\..*//'`
hostsfile="/etc/hosts"
egrep '^[^#].*[ 	]'$realhost'([ 	]|$)' <$hostsfile >/dev/null || \
    _notrun "No entry for hostname of local host in $hostsfile"

signal="$sudo $PCP_BINADM_DIR/pmsignal"
status=1	# failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

_stop_auto_restart pmlogger
_service pmlogger stop | _filter_pcp_stop
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | egrep '[P]ID|[p]mlogger' >>$here/$seq.full

_filter()
{
    sed \
	-e 's/\.\.\.*/[dots]/' \
	-e "s/$realhost/LOCALHOST/g" \
	-e "s/$LOCALHOST/LOCALHOST/g" \
	-e "s/local:/LOCALHOST/g" \
	-e 's/control:[0-9][0-9]*]/control:LINE]/' \
	-e 's/control.d\/*:[0-9][0-9]*]/control:LINE]/' \
	-e 's;/usr/var;/var;g' \
	-e "s;$tmp;TMP;g" \
	-e "s;$PCP_TMP_DIR;PCP_TMP_DIR;" \
	-e '/Duplicate/d' \
	-e 's/process [0-9][0-9]*/process PID/' \
	-e '/[0-9] inode=/d' \
	-e 's/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.[0-9][0-9]\.[0-9][0-9]/CHECK/' \
	-e '/^Start: /s/ .*/ .../' \
	-e '/^End: /s/ .*/ .../' \
	-e 's/^----------.*\/lock/.../' \
    | $PCP_AWK_PROG '
/^Called from:/			{ skip = 1; next }
/^Contents of /			{ print; print "..."; skip = 1; next }
skip == 1 && /^--- end of/	{ skip = 0; next }
skip == 1			{ next }
				{ print }' \
    | _filter_cron_scripts
}

_stop_loggers()
{
    $signal -a -s TERM pmlogger >/dev/null 2>&1
}

_cleanup()
{
    echo
    echo "Cleaning up"

    if [ -f $tmp.etc.hosts.orig ]
    then
       $sudo cp $tmp.etc.hosts.orig $hostsfile
    fi

    _stop_loggers

    # get out of tmp directory so we can delete it
    cd
    $sudo rm -rf $tmp $tmp.*

    $sudo $PCP_BINADM_DIR/pmlogger_check
    _service pmlogger restart >/dev/null 2>&1
    _restore_auto_restart pmlogger
    _wait_for_pmlogger
}

_setup()
{
    pids=`_get_pids_by_name -a pmlogger`
    for pid in $pids
    do
	$signal -s TERM $pid >/dev/null 2>&1
	_wait_pmlogger_end $pid
    done

    cd
    $sudo rm -rf $logdir $logdir2
    mkdir $logdir $logdir2
    chmod ugo+w $logdir $logdir2
}

# This relies on 127.0.0.x being an alias for 127.0.0.1
# Also, place our real hostname entry first, then the
# 127.0.0.x alias so that the reverse hostname lookup in
# pmlogger gets the same hostname for the portmap files
# as the -h from the command line
#
_update_hostsfile()
{
    extra_hostname=$1

    # save old hosts file
    $sudo cp $hostsfile $tmp.etc.hosts.orig

    # update /etc/hosts
    # add an entry for extra_hostname
    rm -f $tmp.etc.hosts
    for i in 2 3 4 5 6 7 8 9
    do
	if grep "^127.0.0.$i" $hostsfile >/dev/null
	then
	    :
	else
	    echo "# Hacked for QA/$seq" >$tmp.etc.hosts
	    echo "#" >>$tmp.etc.hosts
	    egrep '^[^#].*[ 	]'$realhost'([ 	]|$)' <$hostsfile >$tmp.tmp
	    if [ `wc -l <$tmp.tmp | sed -e 's/  *//g'` -ne 1 ]
	    then
		echo "Updating $hostsfile failed: no single $realhost entry: (see $seq.full):"
		echo "=== $hostsfile ===" >>$here/$seq.full
		cat $hostsfile >>$here/$seq.full
		echo "grep $realhost ..." >>$here/$seq.full
		cat $tmp.tmp >>$here/$seq.full
		exit
	    fi
	    cat $tmp.tmp >>$tmp.etc.hosts
	    echo "127.0.0.$i	$extra_hostname" >>$tmp.etc.hosts
	    echo "" >>$tmp.etc.hosts
	    egrep -v '^[^#].*[ 	]'$realhost'([ 	]|$)' <$hostsfile >>$tmp.etc.hosts
	    break
	fi
    done

    # extra check
    # we expect to make a change!
    if [ -s $tmp.etc.hosts ]
    then
	# create new hosts file
	$sudo cp $tmp.etc.hosts $hostsfile
    else
	echo "Updating $hostsfile failed: no 127.0.0.x available: (see $seq.full):"
	echo "=== $hostsfile ===" >>$here/$seq.full
	cat $hostsfile >>$here/$seq.full
	exit
    fi
}

_extract_control()
{
    id=$1

    #preamble
    cat - <<EOF
# PCP archive logging configuration/control - for qa/$seq
#
\$version=1.1
#
EOF

    $PCP_AWK_PROG < $tmp.control -v id=$id '
	$1 == "#" && $2 == id"." {
		mode=1;
		print;
		next;
	}
	mode == 1 && $1 == "#" && $2 ~ /[0-9][0-9]*\./ {
		# found next item so exit
		exit;
	}
	mode == 1 {
		print;
		next;
	}
    ' 
}

# real QA test starts here
rm -f $here/$seq.full

host1=$LOCALHOST
host2=$LOCALHOST"super"
echo "host1=$host1" >>$here/$seq.full
echo "host2=$host2" >>$here/$seq.full
_update_hostsfile $host2
echo "=== $hostsfile ===" >>$here/$seq.full
cat $hostsfile >>$here/$seq.full
echo "=== check pcp on $host2 ===" >>$here/$seq.full
pcp -h $host2 >>$here/$seq.full

logdir=$tmp
logdir2="$tmp.subname"


#
# This global file should be extended to do more control
# file tests.
# Make sure you put the number in the form "# 3. "
#
cat > $tmp.control <<EOF
# 
# 1. Test duplicate directory names;  non-primary and then primary
#
LOCALHOSTNAME	n   n	$logdir -c $logdir/config
LOCALHOSTNAME	y   n	$logdir	-c $logdir/config
# 
# 2. Test duplicate directory names ; primary and then non-primary
#
LOCALHOSTNAME	y   n	$logdir	-c $logdir/config
LOCALHOSTNAME	n   n	$logdir	-c $logdir/config
# 
# 3. Test directory names which are subnames 
#
LOCALHOSTNAME	y   n   $logdir2  -c $logdir/config
LOCALHOSTNAME	n   n   $logdir -c $logdir/config
# 
# 4. Test when have hosts which are subnames
#
$host1	y   n   $logdir/$host1  -c $logdir/config
$host2	n   n   $logdir/$host2 -c $logdir/config
#
# 5. Putting all the cases together
#
LOCALHOSTNAME	y   n   $logdir.1 -c $logdir/config
LOCALHOSTNAME	n   n   $logdir.2 -c $logdir/config
$host1	        n   n   $logdir.3 -c $logdir/config
$host1	        n   n   $logdir.4 -c $logdir/config
$host2	        n   n   $logdir.5 -c $logdir/config
$host2	        n   n   $logdir.6 -c $logdir/config
EOF

# pmlogger_check runs as the user "pcp" ... fix ownership of output file
#
touch $tmp.log
$sudo chown $PCP_USER:$PCP_GROUP $tmp.log

# which tests to try out
test_set="1 2 3 4 5"

for i in $test_set
do
    echo >>$here/$seq.full
    echo "=== test $i ===" >>$here/$seq.full
    echo "--- before _setup() ---" >>$here/$seq.full
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS | egrep '[P]ID|[p]mlogger' >>$here/$seq.full
    _setup
    echo "--- after _setup() ---" >>$here/$seq.full
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS | egrep '[P]ID|[p]mlogger' >>$here/$seq.full
    
    # create the control file from master
    _extract_control $i > $logdir/control
    echo "--- control ---" >>$here/$seq.full
    cat $logdir/control >>$here/$seq.full

    # let's see what control file we are using
    echo
    _filter < $logdir/control
    echo

    echo "log mandatory on once pmcd.control.debug" >$logdir/config
    echo "--- pmlogger_check ---" >> $here/$seq.full
    $sudo -u $PCP_USER -g $PCP_GROUP sh -c "umask 022; $PCP_BINADM_DIR/pmlogger_check -V -c $logdir/control -l $tmp.log"
    $sudo cat $tmp.log | tee -a $here/$seq.full | _filter
done

echo | tee -a $here/$seq.full
echo "Restart tests while other pmloggers are running ..." | tee -a $here/$seq.full

# now for the 5. case, terminate the pmloggers one at a time and
# check the pmlogger_check restarts the right one
#
for log in $logdir.1 $logdir.2 $logdir.3 $logdir.4 $logdir.5 $logdir.6
do
    echo | tee -a $here/$seq.full
    echo "Kill off one pmlogger ..." | tee -a $here/$seq.full
    pid=`grep -l $log $PCP_TMP_DIR/pmlogger/[0-9]* | sed -e 's/.*pmlogger.//'`
    try=1
    num_pid=`echo "$pid" | wc -w | sed -e 's/ *//g'`
    while [ $try -lt 10 -a $num_pid -ne 1 ]
    do
	echo "try $try num_pid=$num_pid: `echo $pid`" >>$here/$seq.full
	sleep 1
	pid=`grep -l $log $PCP_TMP_DIR/pmlogger/[0-9]* | sed -e 's/.*pmlogger.//'`
	num_pid=`echo "$pid" | wc -w | sed -e 's/ *//g'`
	try=`expr $try + 1`
    done
    if [ $num_pid -ne 1 ]
    then
	echo "Arrgh ... failed to find just one pmlogger to kill ... see $seq.full"
	echo "" >>$here/$seq.full
	echo "Arrgh ... failed to find just one pmlogger to kill ..." >>$here/$seq.full
	$PCP_PS_PROG $PCP_PS_ALL_FLAGS | egrep '[P]ID|[p]mlogger' >>$here/$seq.full
	echo "$log contents ..." >>$here/$seq.full
	ls -l $log >>$here/$seq.full
	for ctl in $PCP_TMP_DIR/pmlogger/[0-9]*
	do
	    echo "+++ $ctl +++" >>$here/$seq.full
	    cat $ctl >>$here/$seq.full
	done
	echo "+ grep -l $log"' $PCP_TMP_DIR/pmlogger/[0-9]*' >>$here/$seq.full
	grep $log $PCP_TMP_DIR/pmlogger/[0-9]* >>$here/$seq.full
	exit
    fi
    echo "Victim pid=$pid" >>$here/$seq.full
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS \
    | $PCP_AWK_PROG >>$here/$seq.full '
NR == 1	{ print
	  if ($2 != "PID") {
	    print "PID not in field 2 of ps output!"
	    exit(1)
	  }
	  next
	}
$2 == '"$pid"' { print; exit(0) }'
    echo "status" | pmlc $pid >>$here/$seq.full
    echo "+++ $PCP_TMP_DIR/pmlogger/$pid +++" >>$here/$seq.full
    cat $PCP_TMP_DIR/pmlogger/$pid >>$here/$seq.full
    $signal -s TERM $pid
    echo "Check ..." | tee -a $here/$seq.full
    $sudo -u $PCP_USER -g $PCP_GROUP sh -c "umask 022; $PCP_BINADM_DIR/pmlogger_check -V -V -c $logdir/control -l $tmp.log"
    if [ -f $tmp.log ]
    then
	$sudo cat $tmp.log \
	| tee -a $here/$seq.full \
	| sed -e '/^... try /d' \
	| _filter
    fi

    # Need to avoid race here.  Next iteration might try to start this
    # one again ... wait (up to 30 secs) for most recently started
    # pmlogger to get going ...
    #
    x=0
    rm -f $tmp.found
    while [ "$x" -lt 30 ]
    do
	pid=`grep -l $log $PCP_TMP_DIR/pmlogger/[0-9]* | sed -e 's/.*pmlogger.//'`
	if [ -n "$pid" ]
	then
	    echo "Found pid $pid after $x iterations" >>$here/$seq.full
	    echo "status" | pmlc $pid >>$here/$seq.full
	    touch $tmp.found
	    break
	fi
	x=`expr $x + 1`
	sleep 1
    done
    if [ ! -f $tmp.found ]
    then
	echo "Arrgh ... pmlogger_check failed to restart pmloggers" | tee -a $here/$seq.full
	$PCP_PS_PROG $PCP_PS_ALL_FLAGS | egrep '[P]ID|[p]mlogger' >>$here/$seq.full
	for state in $PCP_TMP_DIR/pmlogger/[0-9]*
	do
	    echo "=== $state ==="
	    cat $state
	done
	echo "See $seq.full for details"
	exit
    fi

done

# success, all done
status=0
exit
